{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 封装函数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "import pandas as pd\n",
    "session =HTMLSession()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "def requests_liepin( url, params):\n",
    "    r = session.get( url , params = payload)\n",
    "\n",
    "    # 先取特定元素, 精准打击其子后辈\n",
    "    主要元素 = r.html.xpath( '//ul[@class=\"sojob-list\"]/li')\n",
    "\n",
    "    # 作为xpath字典，键为我要抓的牛肉名称，值为xpath\n",
    "    dict_xpaths={ \n",
    "        'text': {\n",
    "            'edu':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]',\n",
    "            '经验':      '//div[contains(@class,\"job-info\")]/p/span[@class=\"edu\"]/following-sibling::span',\n",
    "            '薪水':    '//div[contains(@class,\"job-info\")]/p/span[@class=\"text-warning\"]', \n",
    "            '时间':    '//div[contains(@class,\"job-info\")]/p/time/@title', \n",
    "            '职称':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司地点': '//div[contains(@class,\"job-info\")]/p/a',\n",
    "            '公司名称': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        },\n",
    "        'text_content': {\n",
    "        },\n",
    "        'href': {\n",
    "            '链结':    '//div[contains(@class,\"job-info\")]/h3/a', \n",
    "            '公司URL': '//div[contains(@class,\"sojob-item-main\")]//p[@class=\"company-name\"]/a', \n",
    "        }\n",
    "    }\n",
    "\n",
    "    def get_e_text_content(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [e.xpath(_xpath_)[0].lxml.text_content() for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_text(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [\"\".join([x.strip() if type(x) is str else x.text.strip() for x in e.xpath(_xpath_)]) for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    def get_e_href(_xpath_):\n",
    "        # 高级列表推导\n",
    "        暂存结果 = [list(e.xpath(_xpath_, first=True).absolute_links)[0] \\\n",
    "                   if len(e.xpath(_xpath_, first=True).absolute_links) >= 1  \\\n",
    "                   else \"\" for e in 主要元素]\n",
    "        return(暂存结果)\n",
    "\n",
    "    # 只对主要元素下进行.xpath取值\n",
    "    数据字典 = dict()\n",
    "\n",
    "    数据字典 = {k:get_e_text_content(v) for k,v in dict_xpaths['text_content'].items()}\n",
    "    数据字典.update({k:get_e_text(v) for k,v in dict_xpaths['text'].items()})\n",
    "    数据字典.update({k:get_e_href(v) for k,v in dict_xpaths['href'].items()})\n",
    "\n",
    "    数据 = pd.DataFrame(数据字典)\n",
    "    #数据.to_excel(\"20春_Web数据挖掘_week03_liepin.xlsx\", sheet_name=\"搜查结果\")\n",
    "    return (数据)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 指定公司（腾讯）"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [0]},\n",
       " 1: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [1]},\n",
       " 2: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [2]},\n",
       " 3: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [3]},\n",
       " 4: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [4]},\n",
       " 5: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [5]},\n",
       " 6: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [6]},\n",
       " 7: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [7]},\n",
       " 8: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [8]},\n",
       " 9: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15°radeFlag=0'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['e00cb734d40bd332252a53c1292340e9'],\n",
       "  'curPage': [9]}}"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url=\"https://www.liepin.com/zhaopin/?compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=7983148&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_prime&d_ckId=e00cb734d40bd332252a53c1292340e9&d_curPage=0&d_pageSize=40&d_headId=e00cb734d40bd332252a53c1292340e9\"\n",
    "r=session.get(url)\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "翻页url={x.text:x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)}\n",
    "翻页url\n",
    "href_翻页=[x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "df=pd.DataFrame([urlparse(x) for x in href_翻页 ])\n",
    "df_qs=pd.DataFrame([{ k:v[0] for k,v in parse_qs(x).items()}for x in df['query']])\n",
    "df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) \n",
    "df_qs\n",
    "长度=int(df_qs.curPage_int.max()+1)\n",
    "def 翻页_参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "\n",
    "翻页_参数模板=翻页_参数拆解(href_翻页[0])\n",
    "翻页_参数模板\n",
    "def 翻页_参数调整(key,curPage,compIds):\n",
    "    参数=翻页_参数模板.copy()\n",
    "    参数['key']=key\n",
    "    参数['curPage']=curPage\n",
    "    参数['compIds']=compIds\n",
    "    return 参数\n",
    "指定公司_关键词_翻页参数={i:翻页_参数调整(key=['广告'],compIds=['7983148'],curPage = [i])\n",
    "                                   for i in  range(0,长度)\n",
    "                                  }\n",
    "\n",
    "指定公司_关键词_翻页参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pageSize</th>\n",
       "      <th>sortFlag</th>\n",
       "      <th>compIds</th>\n",
       "      <th>key</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>7863078</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>10095329</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>7983148</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9630160</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>1072424</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>2036768</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>12176713</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9131484</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9543080</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8653060</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8668446</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9272548</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>12253761</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>10099719</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>4787049</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9837971</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>591850</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8132113</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8628096</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8586548</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>884492</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>6128386</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>7865459</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>7856955</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9747025</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>5964833</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9352611</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8608772</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>2174886</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8017827</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>7891846</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>2615574</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>9844505</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8364657</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>8644766</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>7c31c6749ef587e86cf571fb87b5eafc</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pageSize sortFlag   compIds key  \\\n",
       "0        40       15   7863078  广告   \n",
       "1        40       15  10095329  广告   \n",
       "2        40       15   7983148  广告   \n",
       "3        40       15   9630160  广告   \n",
       "4        40       15   1072424  广告   \n",
       "5        40       15   2036768  广告   \n",
       "6        40       15  12176713  广告   \n",
       "7        40       15   9131484  广告   \n",
       "8        40       15   9543080  广告   \n",
       "9        40       15   8653060  广告   \n",
       "10       40       15   8668446  广告   \n",
       "11       40       15   9272548  广告   \n",
       "12       40       15  12253761  广告   \n",
       "13       40       15  10099719  广告   \n",
       "14       40       15   4787049  广告   \n",
       "15       40       15   9837971  广告   \n",
       "16       40       15    591850  广告   \n",
       "17       40       15   8132113  广告   \n",
       "18       40       15   8628096  广告   \n",
       "19       40       15   8586548  广告   \n",
       "20       40       15    884492  广告   \n",
       "21       40       15   6128386  广告   \n",
       "22       40       15   7865459  广告   \n",
       "23       40       15   7856955  广告   \n",
       "24       40       15   9747025  广告   \n",
       "25       40       15   5964833  广告   \n",
       "26       40       15   9352611  广告   \n",
       "27       40       15   8608772  广告   \n",
       "28       40       15   2174886  广告   \n",
       "29       40       15   8017827  广告   \n",
       "30       40       15   7891846  广告   \n",
       "31       40       15   2615574  广告   \n",
       "32       40       15   9844505  广告   \n",
       "33       40       15   8364657  广告   \n",
       "34       40       15   8644766  广告   \n",
       "\n",
       "                                            siTag       d_sfrom  \\\n",
       "0   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "1   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "2   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "3   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "4   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "5   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "6   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "7   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "8   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "9   us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "10  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "11  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "12  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "13  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "14  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "15  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "16  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "17  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "18  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "19  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "20  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "21  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "22  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "23  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "24  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "25  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "26  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "27  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "28  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "29  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "30  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "31  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "32  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "33  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "34  us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw  search_prime   \n",
       "\n",
       "                              d_ckId d_curPage d_pageSize  \\\n",
       "0   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "1   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "2   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "3   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "4   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "5   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "6   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "7   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "8   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "9   7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "10  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "11  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "12  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "13  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "14  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "15  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "16  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "17  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "18  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "19  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "20  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "21  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "22  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "23  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "24  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "25  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "26  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "27  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "28  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "29  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "30  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "31  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "32  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "33  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "34  7c31c6749ef587e86cf571fb87b5eafc         0         40   \n",
       "\n",
       "                            d_headId  \n",
       "0   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "1   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "2   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "3   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "4   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "5   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "6   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "7   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "8   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "9   7c31c6749ef587e86cf571fb87b5eafc  \n",
       "10  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "11  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "12  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "13  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "14  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "15  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "16  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "17  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "18  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "19  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "20  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "21  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "22  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "23  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "24  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "25  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "26  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "27  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "28  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "29  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "30  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "31  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "32  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "33  7c31c6749ef587e86cf571fb87b5eafc  \n",
       "34  7c31c6749ef587e86cf571fb87b5eafc  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'字节跳动': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['7863078'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '湖北拉思特网络科技有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['10095329'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '腾讯': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['7983148'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京字节跳动网络技术有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9630160'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '阿里巴巴': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['1072424'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '美团点评': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['2036768'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '猎聘招聘顾问团队': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['12176713'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京多彩互动广告有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9131484'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京大禹创联商贸有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9543080'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '绿城房地产建设管理集团有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8653060'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京新潮文化传媒有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8668446'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '成都新潮传媒集团有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9272548'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '上海煜融广告有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['12253761'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '山东微程信息技术有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['10099719'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " 'vivo': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['4787049'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '四川小板科技有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9837971'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '58同城': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['591850'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '赢天下科技': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8132113'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '剧星传媒': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8628096'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '上海微盟': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8586548'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " 'Baidu': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['884492'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '搜狐新媒体': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['6128386'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '软通动力': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['7865459'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '新意互动': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['7856955'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '京东商城': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9747025'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '网易集团': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['5964833'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京字节跳动科技有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9352611'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '广东奥园商业地产集团有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8608772'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '小米': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['2174886'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '赞意': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8017827'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京尚德在线教育科技有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['7891846'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '群邑(上海)广告有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['2615574'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京锐卡安讯科技有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['9844505'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '龙湖集团': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8364657'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']},\n",
       " '北京五八到家信息技术有限公司': {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'compIds': ['8644766'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['7c31c6749ef587e86cf571fb87b5eafc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['7c31c6749ef587e86cf571fb87b5eafc']}}"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url=\"https://www.liepin.com/zhaopin/?industries=&subIndustry=&dqs=&salary=&jobKind=&pubTime=&compkind=&compscale=&searchType=1&isAnalysis=&sortFlag=15&d_headId=837c686e528ef0155173a3137a2bae9b&d_ckId=837c686e528ef0155173a3137a2bae9b&d_sfrom=search_prime&d_curPage=0&d_pageSize=40&siTag=bFGQTbwE_AAQSb-u11jrBw~fA9rXquZc5IkJpXC-Ycixw&key=%E5%B9%BF%E5%91%8A\"\n",
    "r=session.get(url)\n",
    "指定公司url=r.html.xpath(\"//dd[@class='comp-list']/ul[@class='clearfix']/li/a\")\n",
    "指定公司url\n",
    "公司字典={a.xpath(\"a/span/text()\")[0]:a.xpath(\"a/@href\")[0]for a in 指定公司url}\n",
    "公司字典\n",
    "href_列表 = [x.xpath('a/@href')[0] for x in 指定公司url]\n",
    "href_列表\n",
    "#分解url\n",
    "\n",
    "df= pd.DataFrame([urlparse(x) for x in href_列表])\n",
    "df.nunique()\n",
    "df_qs=pd.DataFrame([{k:v[0]for k,v in parse_qs(x).items()}for x in df['query']])\n",
    "display(df_qs)\n",
    "df_qs.nunique()\n",
    "#compIds 不同\n",
    "#取一例做参数模板\n",
    "def 参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "\n",
    "参数模板=参数拆解(href_列表[0])\n",
    "参数模板\n",
    "def 参数调整(key,compIds):\n",
    "    参数=参数模板.copy()\n",
    "    参数['key']=key\n",
    "    参数['compIds']=compIds\n",
    "    return 参数\n",
    "公司代号字典={k:参数拆解(v)['compIds'][0]for k,v in 公司字典.items()}\n",
    "公司代号字典\n",
    "\n",
    "修改关键词={k:参数调整(compIds=[v],key=['广告'])for k,v in 公司代号字典.items()}\n",
    "修改关键词"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'字节跳动': '7863078',\n",
       " '湖北拉思特网络科技有限公司': '10095329',\n",
       " '腾讯': '7983148',\n",
       " '北京字节跳动网络技术有限公司': '9630160',\n",
       " '阿里巴巴': '1072424',\n",
       " '美团点评': '2036768',\n",
       " '猎聘招聘顾问团队': '12176713',\n",
       " '北京多彩互动广告有限公司': '9131484',\n",
       " '北京大禹创联商贸有限公司': '9543080',\n",
       " '绿城房地产建设管理集团有限公司': '8653060',\n",
       " '北京新潮文化传媒有限公司': '8668446',\n",
       " '成都新潮传媒集团有限公司': '9272548',\n",
       " '上海煜融广告有限公司': '12253761',\n",
       " '山东微程信息技术有限公司': '10099719',\n",
       " 'vivo': '4787049',\n",
       " '四川小板科技有限公司': '9837971',\n",
       " '58同城': '591850',\n",
       " '赢天下科技': '8132113',\n",
       " '剧星传媒': '8628096',\n",
       " '上海微盟': '8586548',\n",
       " 'Baidu': '884492',\n",
       " '搜狐新媒体': '6128386',\n",
       " '软通动力': '7865459',\n",
       " '新意互动': '7856955',\n",
       " '京东商城': '9747025',\n",
       " '网易集团': '5964833',\n",
       " '北京字节跳动科技有限公司': '9352611',\n",
       " '广东奥园商业地产集团有限公司': '8608772',\n",
       " '小米': '2174886',\n",
       " '赞意': '8017827',\n",
       " '北京尚德在线教育科技有限公司': '7891846',\n",
       " '群邑(上海)广告有限公司': '2615574',\n",
       " '北京锐卡安讯科技有限公司': '9844505',\n",
       " '龙湖集团': '8364657',\n",
       " '北京五八到家信息技术有限公司': '8644766'}"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "公司代号字典"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 实现 一个公司一个搜索关键词翻页搜索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "广告 10\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>序</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>30620-腾讯广告微信广告游戏行业运营经理（深圳）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1925954135.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>30633-腾讯广告高级视觉设计师（设计中心 深圳）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1929793323.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>30626-腾讯广告渠道策略分析经理（上海）</td>\n",
       "      <td>上海</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1927617379.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月17日</td>\n",
       "      <td>30624-腾讯广告华东食品饮料行业广告销售经理（上海）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1928123849.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>30-60k·12薪</td>\n",
       "      <td>2020年07月16日</td>\n",
       "      <td>腾讯广告TME&amp;看点广告商业化运营经理（深圳）</td>\n",
       "      <td>深圳-科技园</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1929989843.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>35</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年06月28日</td>\n",
       "      <td>33881-143 微信后台策略安全工程师(广州/深圳)</td>\n",
       "      <td>广州</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1927141573.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>36</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年06月28日</td>\n",
       "      <td>56493-23294-互娱品牌经理（深圳）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1925640491.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>37</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年06月20日</td>\n",
       "      <td>CSIG08-商业化运营产品经理（广州）</td>\n",
       "      <td>广州</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1927855477.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>38</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-30k·14薪</td>\n",
       "      <td>2020年03月09日</td>\n",
       "      <td>商业运营经理</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1926534121.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>39</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2019年10月15日</td>\n",
       "      <td>18434-高级视觉设计师（深圳）</td>\n",
       "      <td>深圳</td>\n",
       "      <td>腾讯</td>\n",
       "      <td>https://www.liepin.com/job/1923228873.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7983148/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index    edu     经验          薪水           时间  \\\n",
       "序                                                   \n",
       "0        0  本科及以上   1-3年          面议  2020年07月19日   \n",
       "1        1  本科及以上   3-5年          面议  2020年07月18日   \n",
       "2        2  本科及以上  5-10年          面议  2020年07月18日   \n",
       "3        3  本科及以上   3-5年          面议  2020年07月17日   \n",
       "4        4  本科及以上   3-5年  30-60k·12薪  2020年07月16日   \n",
       "..     ...    ...    ...         ...          ...   \n",
       "395     35  本科及以上   1-3年          面议  2020年06月28日   \n",
       "396     36  本科及以上   1-3年          面议  2020年06月28日   \n",
       "397     37  本科及以上   1-3年          面议  2020年06月20日   \n",
       "398     38  本科及以上  5-10年  20-30k·14薪  2020年03月09日   \n",
       "399     39  本科及以上  5-10年          面议  2019年10月15日   \n",
       "\n",
       "                               职称    公司地点 公司名称  \\\n",
       "序                                                \n",
       "0      30620-腾讯广告微信广告游戏行业运营经理（深圳）      深圳   腾讯   \n",
       "1      30633-腾讯广告高级视觉设计师（设计中心 深圳）      深圳   腾讯   \n",
       "2          30626-腾讯广告渠道策略分析经理（上海）      上海   腾讯   \n",
       "3    30624-腾讯广告华东食品饮料行业广告销售经理（上海）      深圳   腾讯   \n",
       "4         腾讯广告TME&看点广告商业化运营经理（深圳）  深圳-科技园   腾讯   \n",
       "..                            ...     ...  ...   \n",
       "395  33881-143 微信后台策略安全工程师(广州/深圳)      广州   腾讯   \n",
       "396        56493-23294-互娱品牌经理（深圳）      深圳   腾讯   \n",
       "397          CSIG08-商业化运营产品经理（广州）      广州   腾讯   \n",
       "398                        商业运营经理      深圳   腾讯   \n",
       "399             18434-高级视觉设计师（深圳）      深圳   腾讯   \n",
       "\n",
       "                                              链结  \\\n",
       "序                                                  \n",
       "0    https://www.liepin.com/job/1925954135.shtml   \n",
       "1    https://www.liepin.com/job/1929793323.shtml   \n",
       "2    https://www.liepin.com/job/1927617379.shtml   \n",
       "3    https://www.liepin.com/job/1928123849.shtml   \n",
       "4    https://www.liepin.com/job/1929989843.shtml   \n",
       "..                                           ...   \n",
       "395  https://www.liepin.com/job/1927141573.shtml   \n",
       "396  https://www.liepin.com/job/1925640491.shtml   \n",
       "397  https://www.liepin.com/job/1927855477.shtml   \n",
       "398  https://www.liepin.com/job/1926534121.shtml   \n",
       "399  https://www.liepin.com/job/1923228873.shtml   \n",
       "\n",
       "                                       公司URL keyword  curPage  \n",
       "序                                                              \n",
       "0    https://www.liepin.com/company/7983148/      广告        0  \n",
       "1    https://www.liepin.com/company/7983148/      广告        0  \n",
       "2    https://www.liepin.com/company/7983148/      广告        0  \n",
       "3    https://www.liepin.com/company/7983148/      广告        0  \n",
       "4    https://www.liepin.com/company/7983148/      广告        0  \n",
       "..                                       ...     ...      ...  \n",
       "395  https://www.liepin.com/company/7983148/      广告        9  \n",
       "396  https://www.liepin.com/company/7983148/      广告        9  \n",
       "397  https://www.liepin.com/company/7983148/      广告        9  \n",
       "398  https://www.liepin.com/company/7983148/      广告        9  \n",
       "399  https://www.liepin.com/company/7983148/      广告        9  \n",
       "\n",
       "[400 rows x 12 columns]"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession\n",
    "session =HTMLSession()\n",
    "url=\"https://www.liepin.com/zhaopin/\"\n",
    "r=session.get(url)\n",
    "keywords = ['广告']\n",
    "list_df=list()\n",
    "for key in keywords:\n",
    "    href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "    df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "    df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "    df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数\n",
    "    长度 = df_qs.curPage_int.max()+1\n",
    "    指定公司_关键词_翻页参数={i:翻页_参数调整(key=[key],compIds=['7983148'],curPage = [i])\n",
    "                                   for i in  range(0,长度)\n",
    "                                  } \n",
    "    \n",
    "    #print (参数_keyword_X_curPage)\n",
    "    print (key,长度)\n",
    "    for k,v in  指定公司_关键词_翻页参数.items():\n",
    "        payload = v\n",
    "        df = requests_liepin( url, params = payload)\n",
    "        time.sleep(1+4*random())  #放慢脚步 3-7秒, 平均约5秒\n",
    "        ## 备份\n",
    "#         df.to_csv(\"20春_Web数据挖掘_week04_liepin_{key}_{k}.tsv\"\\\n",
    "#                   .format(key=key, k=k), sep=\"\\t\", encoding=\"utf8\")\n",
    "        \n",
    "        df = df.assign (keyword = key)  # 区分  keyword    \n",
    "        df = df.assign (curPage = k)  # 区分  curPage   \n",
    "        \n",
    "        list_df.append(df)\n",
    "        \n",
    "df_一个关键词爬取 = pd.concat(list_df).reset_index()\n",
    "df_一个关键词爬取.index.name = '序'\n",
    "df_一个关键词爬取"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 实现多个公司一个关键词搜索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "广告 7863078 10\n",
      "广告 10095329 10\n",
      "广告 7983148 10\n",
      "广告 9630160 10\n",
      "广告 1072424 10\n",
      "广告 2036768 10\n",
      "广告 12176713 10\n",
      "广告 9131484 10\n",
      "广告 9543080 10\n",
      "广告 8653060 10\n",
      "广告 8668446 10\n",
      "广告 9272548 10\n",
      "广告 12253761 10\n",
      "广告 10099719 10\n",
      "广告 4787049 10\n",
      "广告 9837971 10\n",
      "广告 591850 10\n",
      "广告 8132113 10\n",
      "广告 8628096 10\n",
      "广告 8586548 10\n",
      "广告 884492 10\n",
      "广告 6128386 10\n",
      "广告 7865459 10\n",
      "广告 7856955 10\n",
      "广告 9747025 10\n",
      "广告 5964833 10\n",
      "广告 9352611 10\n",
      "广告 8608772 10\n",
      "广告 2174886 10\n",
      "广告 8017827 10\n",
      "广告 7891846 10\n",
      "广告 2615574 10\n",
      "广告 9844505 10\n",
      "广告 8364657 10\n",
      "广告 8644766 10\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>序</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>25-50k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告数据仓库工程师 — 核心广告系统</td>\n",
       "      <td></td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>https://www.liepin.com/job/1929359509.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863078/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>信息流广告优化师</td>\n",
       "      <td>上海-闵行区</td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>https://www.liepin.com/job/1928479597.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863078/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>大客户广告销售-文化旅游行业</td>\n",
       "      <td>上海-闵行区</td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>https://www.liepin.com/job/1928479591.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863078/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>大客户广告销售-效果类广告行业</td>\n",
       "      <td>上海-闵行区</td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>https://www.liepin.com/job/1928391225.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863078/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>大客户广告销售-快消行业</td>\n",
       "      <td>上海-闵行区</td>\n",
       "      <td>字节跳动</td>\n",
       "      <td>https://www.liepin.com/job/1928391221.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7863078/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8789</th>\n",
       "      <td>16</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>15-30k·14薪</td>\n",
       "      <td>2019年12月04日</td>\n",
       "      <td>销售培训经理-北京</td>\n",
       "      <td>北京-来广营</td>\n",
       "      <td>北京五八到家信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1924916149.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8644766/</td>\n",
       "      <td>广告</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8790</th>\n",
       "      <td>17</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>15-30k·14薪</td>\n",
       "      <td>2019年12月04日</td>\n",
       "      <td>销售培训经理</td>\n",
       "      <td>北京-来广营</td>\n",
       "      <td>北京五八到家信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1924916143.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8644766/</td>\n",
       "      <td>广告</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8791</th>\n",
       "      <td>18</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>15-30k·14薪</td>\n",
       "      <td>2019年12月03日</td>\n",
       "      <td>销售培训经理</td>\n",
       "      <td>上海-和田</td>\n",
       "      <td>北京五八到家信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1925099831.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8644766/</td>\n",
       "      <td>广告</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8792</th>\n",
       "      <td>19</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>13-26k·12薪</td>\n",
       "      <td>2019年07月16日</td>\n",
       "      <td>市场经理</td>\n",
       "      <td>北京</td>\n",
       "      <td>北京五八到家信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1921598465.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8644766/</td>\n",
       "      <td>广告</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8793</th>\n",
       "      <td>20</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>15-25k·14薪</td>\n",
       "      <td>2019年05月10日</td>\n",
       "      <td>品牌经理</td>\n",
       "      <td>北京</td>\n",
       "      <td>北京五八到家信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1915563734.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8644766/</td>\n",
       "      <td>广告</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>8794 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      index    edu     经验          薪水           时间                  职称  \\\n",
       "序                                                                        \n",
       "0         0  本科及以上   经验不限  25-50k·12薪  2020年07月19日  广告数据仓库工程师 — 核心广告系统   \n",
       "1         1  本科及以上   1-3年          面议  2020年07月19日            信息流广告优化师   \n",
       "2         2  本科及以上   1-3年          面议  2020年07月19日      大客户广告销售-文化旅游行业   \n",
       "3         3  本科及以上   1-3年          面议  2020年07月19日     大客户广告销售-效果类广告行业   \n",
       "4         4  本科及以上   1-3年          面议  2020年07月19日        大客户广告销售-快消行业   \n",
       "...     ...    ...    ...         ...          ...                 ...   \n",
       "8789     16  本科及以上  5-10年  15-30k·14薪  2019年12月04日           销售培训经理-北京   \n",
       "8790     17  本科及以上  5-10年  15-30k·14薪  2019年12月04日              销售培训经理   \n",
       "8791     18  本科及以上  5-10年  15-30k·14薪  2019年12月03日              销售培训经理   \n",
       "8792     19   统招本科  5-10年  13-26k·12薪  2019年07月16日                市场经理   \n",
       "8793     20  本科及以上  5-10年  15-25k·14薪  2019年05月10日                品牌经理   \n",
       "\n",
       "        公司地点            公司名称                                           链结  \\\n",
       "序                                                                           \n",
       "0                       字节跳动  https://www.liepin.com/job/1929359509.shtml   \n",
       "1     上海-闵行区            字节跳动  https://www.liepin.com/job/1928479597.shtml   \n",
       "2     上海-闵行区            字节跳动  https://www.liepin.com/job/1928479591.shtml   \n",
       "3     上海-闵行区            字节跳动  https://www.liepin.com/job/1928391225.shtml   \n",
       "4     上海-闵行区            字节跳动  https://www.liepin.com/job/1928391221.shtml   \n",
       "...      ...             ...                                          ...   \n",
       "8789  北京-来广营  北京五八到家信息技术有限公司  https://www.liepin.com/job/1924916149.shtml   \n",
       "8790  北京-来广营  北京五八到家信息技术有限公司  https://www.liepin.com/job/1924916143.shtml   \n",
       "8791   上海-和田  北京五八到家信息技术有限公司  https://www.liepin.com/job/1925099831.shtml   \n",
       "8792      北京  北京五八到家信息技术有限公司  https://www.liepin.com/job/1921598465.shtml   \n",
       "8793      北京  北京五八到家信息技术有限公司  https://www.liepin.com/job/1915563734.shtml   \n",
       "\n",
       "                                        公司URL keyword  curPage  \n",
       "序                                                               \n",
       "0     https://www.liepin.com/company/7863078/      广告        0  \n",
       "1     https://www.liepin.com/company/7863078/      广告        0  \n",
       "2     https://www.liepin.com/company/7863078/      广告        0  \n",
       "3     https://www.liepin.com/company/7863078/      广告        0  \n",
       "4     https://www.liepin.com/company/7863078/      广告        0  \n",
       "...                                       ...     ...      ...  \n",
       "8789  https://www.liepin.com/company/8644766/      广告        3  \n",
       "8790  https://www.liepin.com/company/8644766/      广告        3  \n",
       "8791  https://www.liepin.com/company/8644766/      广告        3  \n",
       "8792  https://www.liepin.com/company/8644766/      广告        3  \n",
       "8793  https://www.liepin.com/company/8644766/      广告        3  \n",
       "\n",
       "[8794 rows x 12 columns]"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession\n",
    "session =HTMLSession()\n",
    "url=\"https://www.liepin.com/zhaopin/\"\n",
    "r=session.get(url)\n",
    "keywords = ['广告']\n",
    "list_df=list()\n",
    "for number in 公司代号字典.values():\n",
    "    href_列表 = [x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "    df = pd.DataFrame([ urlparse(x) for x in href_列表])\n",
    "    df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()} for x in df['query'] ])\n",
    "    df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) # 变成整数\n",
    "    长度 = df_qs.curPage_int.max()+1\n",
    "#     指定公司_关键词_翻页参数={i:翻页_参数调整(key=[key],compIds=[number],curPage = [i])\n",
    "#                                        for i in  range(0,长度)\n",
    "#                                       }\n",
    "#     print (key,number,长度)\n",
    "    for key in keywords:\n",
    "        指定公司_关键词_翻页参数={i:翻页_参数调整(key=[key],compIds=[number],curPage = [i])\n",
    "                                       for i in  range(0,长度)\n",
    "                                      } \n",
    "\n",
    "        #print (参数_keyword_X_curPage)\n",
    "        print (key,number,长度)\n",
    "        for k,v in  指定公司_关键词_翻页参数.items():\n",
    "            payload = v\n",
    "            df = requests_liepin( url, params = payload)\n",
    "           \n",
    "\n",
    "            df = df.assign (keyword = key)  # 区分  keyword    \n",
    "            df = df.assign (curPage = k)  # 区分  curPage    \n",
    "            list_df.append(df)\n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index()\n",
    "df_all.index.name = '序'\n",
    "df_all"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 行业爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'互联网·游戏·软件': ['互联网/电商', '游戏产业', '计算机软件', 'IT服务'],\n",
       " '电子·通信·硬件': ['电子/芯片/半导体', '通信业', '计算机/网络设备'],\n",
       " '房地产·建筑·物业': ['房地产/建筑', '规划/设计/装潢', '房地产服务'],\n",
       " '金融': ['银行', '保险', '基金/证券/投资', '会计/审计', '信托/担保/拍卖'],\n",
       " '消费品': ['快消品', '批发零售', '服装纺织', '家具/家电', '办公设备', '奢侈品/收藏品', '珠宝/玩具/工艺品'],\n",
       " '汽车·机械·制造': ['汽车/摩托车', '机械/机电/重工', '印刷/包装/造纸', '原材料加工', '仪器/电气/自动化'],\n",
       " '制药·医疗': ['制药/生物工程', '医疗/保健/美容', '医疗器械'],\n",
       " '能源·化工·环保': ['能源/水利', '石油/化工', '采掘/冶炼/矿产', '环保', '新能源'],\n",
       " '服务·外包·中介': ['专业服务', '中介服务', '外包服务', '检测/认证', '餐饮/酒旅/服务', '文体娱乐', '租赁服务'],\n",
       " '广告·传媒·教育·文化': ['广告/市场/会展', '影视文化', '教育培训'],\n",
       " '交通·贸易·物流': ['交通/物流/运输', '贸易/进出口', '航空/航天'],\n",
       " '政府·农林牧渔': ['政务/公共服务', '农林牧渔', '其他行业']}"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "{'互联网/电商': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_01&jobKind=&industries=040&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '游戏产业': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_01&jobKind=&industries=420&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '计算机软件': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_01&jobKind=&industries=010&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " 'IT服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_01&jobKind=&industries=030&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '电子/芯片/半导体': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_02&jobKind=&industries=050&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '通信业': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_02&jobKind=&industries=060&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '计算机/网络设备': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_02&jobKind=&industries=020&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '房地产/建筑': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_03&jobKind=&industries=080&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '规划/设计/装潢': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_03&jobKind=&industries=100&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '房地产服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_03&jobKind=&industries=090&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '银行': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_04&jobKind=&industries=130&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '保险': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_04&jobKind=&industries=140&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '基金/证券/投资': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_04&jobKind=&industries=150&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '会计/审计': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_04&jobKind=&industries=430&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '信托/担保/拍卖': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_04&jobKind=&industries=500&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '快消品': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=190&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '批发零售': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=240&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '服装纺织': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=200&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '家具/家电': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=210&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '办公设备': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=220&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '奢侈品/收藏品': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=460&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '珠宝/玩具/工艺品': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_05&jobKind=&industries=470&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '汽车/摩托车': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_06&jobKind=&industries=350&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '机械/机电/重工': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_06&jobKind=&industries=360&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '印刷/包装/造纸': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_06&jobKind=&industries=180&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '原材料加工': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_06&jobKind=&industries=370&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '仪器/电气/自动化': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_06&jobKind=&industries=340&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '制药/生物工程': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_10&jobKind=&industries=270&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '医疗/保健/美容': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_10&jobKind=&industries=280&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '医疗器械': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_10&jobKind=&industries=290&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '能源/水利': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_11&jobKind=&industries=330&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '石油/化工': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_11&jobKind=&industries=310&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '采掘/冶炼/矿产': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_11&jobKind=&industries=320&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '环保': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_11&jobKind=&industries=300&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '新能源': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_11&jobKind=&industries=490&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '专业服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=120&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '中介服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=110&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '外包服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=440&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '检测/认证': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=450&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '餐饮/酒旅/服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=230&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '文体娱乐': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=260&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '租赁服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_07&jobKind=&industries=510&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '广告/市场/会展': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_08&jobKind=&industries=070&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '影视文化': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_08&jobKind=&industries=170&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '教育培训': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_08&jobKind=&industries=380&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '交通/物流/运输': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_09&jobKind=&industries=250&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '贸易/进出口': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_09&jobKind=&industries=160&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '航空/航天': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_09&jobKind=&industries=480&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '政务/公共服务': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_12&jobKind=&industries=390&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '农林牧渔': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_12&jobKind=&industries=410&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5',\n",
       " '其他行业': '/zhaopin/?flushckid=1&compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=&compIds=&subIndustry=&industryType=industry_12&jobKind=&industries=400&compscale=&key=&siTag=1B2M2Y8AsgTpgAmY7PhCfg%7EfA9rXquZc5IkJpXC-Ycixw&d_sfrom=search_unknown&d_ckId=867d9bd186363c4315e015c0f5a011e5&d_curPage=0&d_pageSize=40&d_headId=867d9bd186363c4315e015c0f5a011e5'}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url=\"https://www.liepin.com/zhaopin/?industries=&subIndustry=&dqs=&salary=&jobKind=&pubTime=&compkind=&compscale=&searchType=1&isAnalysis=&sortFlag=15&d_headId=591ebc1760fee953b72353bace17e6cc&d_ckId=591ebc1760fee953b72353bace17e6cc&d_sfrom=search_prime&d_curPage=0&d_pageSize=40&siTag=bFGQTbwE_AAQSb-u11jrBw~fA9rXquZc5IkJpXC-Ycixw&key=%E5%B9%BF%E5%91%8A\"\n",
    "行业大类=r.html.xpath(\"//dd[contains(@class,'select-industry')]/ul[@class='clearfix']/li\")\n",
    "# print(行业大类)\n",
    "行业大类url=r.html.xpath(\"//li/div[@class='sub-industry']/a\")\n",
    "行业大类url\n",
    "e={a.xpath(\"a/text()\")[0]:a.xpath(\"a/@href\")[0] for a in 行业大类url}\n",
    "e\n",
    "行业={x.xpath(\"//li/span/text()\")[0]: x.xpath(\"//li/div[@class='sub-industry']/a/text()\")for x in 行业大类}\n",
    "display(行业)\n",
    "# ef=pd.DataFrame(行业)\n",
    "# ef\n",
    "# 行业大类列表=[x.xpath(\"//li/span/text()\")[0] for x in 行业大类]\n",
    "# print(行业大类列表)\n",
    "行业细分=r.html.xpath(\"//div[@class='sub-industry']/a\")\n",
    "# #行业细分\n",
    "行业列表=[a.xpath(\"a/@href\")[0] for a in 行业细分]\n",
    "行业列表\n",
    "行业字典={a.xpath(\"a/text()\")[0]:a.xpath(\"a/@href\")[0] for a in 行业细分}\n",
    "行业字典\n",
    "#ef=行业字典.T#.rename(columns={0:url})\n",
    "\n",
    "#ef\n",
    "# 行业名称=[a.xpath(\"a/text()\")[0]for a in 行业细分]#pd.Series()\n",
    "# 行业名称"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>scheme</th>\n",
       "      <th>netloc</th>\n",
       "      <th>path</th>\n",
       "      <th>params</th>\n",
       "      <th>query</th>\n",
       "      <th>fragment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td></td>\n",
       "      <td></td>\n",
       "      <td>/zhaopin/</td>\n",
       "      <td></td>\n",
       "      <td>flushckid=1&amp;compkind=&amp;dqs=&amp;pubTime=&amp;pageSize=4...</td>\n",
       "      <td></td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   scheme netloc       path params  \\\n",
       "0                 /zhaopin/          \n",
       "1                 /zhaopin/          \n",
       "2                 /zhaopin/          \n",
       "3                 /zhaopin/          \n",
       "4                 /zhaopin/          \n",
       "5                 /zhaopin/          \n",
       "6                 /zhaopin/          \n",
       "7                 /zhaopin/          \n",
       "8                 /zhaopin/          \n",
       "9                 /zhaopin/          \n",
       "10                /zhaopin/          \n",
       "11                /zhaopin/          \n",
       "12                /zhaopin/          \n",
       "13                /zhaopin/          \n",
       "14                /zhaopin/          \n",
       "15                /zhaopin/          \n",
       "16                /zhaopin/          \n",
       "17                /zhaopin/          \n",
       "18                /zhaopin/          \n",
       "19                /zhaopin/          \n",
       "20                /zhaopin/          \n",
       "21                /zhaopin/          \n",
       "22                /zhaopin/          \n",
       "23                /zhaopin/          \n",
       "24                /zhaopin/          \n",
       "25                /zhaopin/          \n",
       "26                /zhaopin/          \n",
       "27                /zhaopin/          \n",
       "28                /zhaopin/          \n",
       "29                /zhaopin/          \n",
       "30                /zhaopin/          \n",
       "31                /zhaopin/          \n",
       "32                /zhaopin/          \n",
       "33                /zhaopin/          \n",
       "34                /zhaopin/          \n",
       "35                /zhaopin/          \n",
       "36                /zhaopin/          \n",
       "37                /zhaopin/          \n",
       "38                /zhaopin/          \n",
       "39                /zhaopin/          \n",
       "40                /zhaopin/          \n",
       "41                /zhaopin/          \n",
       "42                /zhaopin/          \n",
       "43                /zhaopin/          \n",
       "44                /zhaopin/          \n",
       "45                /zhaopin/          \n",
       "46                /zhaopin/          \n",
       "47                /zhaopin/          \n",
       "48                /zhaopin/          \n",
       "49                /zhaopin/          \n",
       "50                /zhaopin/          \n",
       "\n",
       "                                                query fragment  \n",
       "0   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "1   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "2   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "3   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "4   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "5   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "6   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "7   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "8   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "9   flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "10  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "11  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "12  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "13  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "14  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "15  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "16  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "17  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "18  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "19  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "20  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "21  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "22  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "23  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "24  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "25  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "26  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "27  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "28  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "29  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "30  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "31  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "32  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "33  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "34  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "35  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "36  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "37  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "38  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "39  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "40  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "41  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "42  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "43  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "44  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "45  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "46  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "47  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "48  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "49  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           \n",
       "50  flushckid=1&compkind=&dqs=&pubTime=&pageSize=4...           "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>flushckid</th>\n",
       "      <th>pageSize</th>\n",
       "      <th>industryType</th>\n",
       "      <th>industries</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>040</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>420</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>010</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_01</td>\n",
       "      <td>030</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>050</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>060</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_02</td>\n",
       "      <td>020</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>080</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>100</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_03</td>\n",
       "      <td>090</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>130</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>140</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>150</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>430</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_04</td>\n",
       "      <td>500</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>190</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>240</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>200</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>210</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>220</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>460</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_05</td>\n",
       "      <td>470</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>350</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>360</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>180</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>370</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_06</td>\n",
       "      <td>340</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>270</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>280</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_10</td>\n",
       "      <td>290</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>330</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>310</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>32</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>320</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>300</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_11</td>\n",
       "      <td>490</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>120</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>110</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>440</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>450</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>230</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>260</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_07</td>\n",
       "      <td>510</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>070</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>170</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_08</td>\n",
       "      <td>380</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>250</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>160</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_09</td>\n",
       "      <td>480</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>390</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>410</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>1</td>\n",
       "      <td>40</td>\n",
       "      <td>industry_12</td>\n",
       "      <td>400</td>\n",
       "      <td>1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw</td>\n",
       "      <td>search_unknown</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>867d9bd186363c4315e015c0f5a011e5</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   flushckid pageSize industryType industries  \\\n",
       "0          1       40  industry_01        040   \n",
       "1          1       40  industry_01        420   \n",
       "2          1       40  industry_01        010   \n",
       "3          1       40  industry_01        030   \n",
       "4          1       40  industry_02        050   \n",
       "5          1       40  industry_02        060   \n",
       "6          1       40  industry_02        020   \n",
       "7          1       40  industry_03        080   \n",
       "8          1       40  industry_03        100   \n",
       "9          1       40  industry_03        090   \n",
       "10         1       40  industry_04        130   \n",
       "11         1       40  industry_04        140   \n",
       "12         1       40  industry_04        150   \n",
       "13         1       40  industry_04        430   \n",
       "14         1       40  industry_04        500   \n",
       "15         1       40  industry_05        190   \n",
       "16         1       40  industry_05        240   \n",
       "17         1       40  industry_05        200   \n",
       "18         1       40  industry_05        210   \n",
       "19         1       40  industry_05        220   \n",
       "20         1       40  industry_05        460   \n",
       "21         1       40  industry_05        470   \n",
       "22         1       40  industry_06        350   \n",
       "23         1       40  industry_06        360   \n",
       "24         1       40  industry_06        180   \n",
       "25         1       40  industry_06        370   \n",
       "26         1       40  industry_06        340   \n",
       "27         1       40  industry_10        270   \n",
       "28         1       40  industry_10        280   \n",
       "29         1       40  industry_10        290   \n",
       "30         1       40  industry_11        330   \n",
       "31         1       40  industry_11        310   \n",
       "32         1       40  industry_11        320   \n",
       "33         1       40  industry_11        300   \n",
       "34         1       40  industry_11        490   \n",
       "35         1       40  industry_07        120   \n",
       "36         1       40  industry_07        110   \n",
       "37         1       40  industry_07        440   \n",
       "38         1       40  industry_07        450   \n",
       "39         1       40  industry_07        230   \n",
       "40         1       40  industry_07        260   \n",
       "41         1       40  industry_07        510   \n",
       "42         1       40  industry_08        070   \n",
       "43         1       40  industry_08        170   \n",
       "44         1       40  industry_08        380   \n",
       "45         1       40  industry_09        250   \n",
       "46         1       40  industry_09        160   \n",
       "47         1       40  industry_09        480   \n",
       "48         1       40  industry_12        390   \n",
       "49         1       40  industry_12        410   \n",
       "50         1       40  industry_12        400   \n",
       "\n",
       "                                            siTag         d_sfrom  \\\n",
       "0   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "1   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "2   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "3   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "4   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "5   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "6   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "7   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "8   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "9   1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "10  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "11  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "12  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "13  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "14  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "15  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "16  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "17  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "18  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "19  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "20  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "21  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "22  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "23  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "24  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "25  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "26  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "27  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "28  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "29  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "30  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "31  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "32  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "33  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "34  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "35  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "36  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "37  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "38  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "39  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "40  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "41  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "42  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "43  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "44  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "45  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "46  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "47  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "48  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "49  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "50  1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw  search_unknown   \n",
       "\n",
       "                              d_ckId d_curPage d_pageSize  \\\n",
       "0   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "1   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "2   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "3   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "4   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "5   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "6   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "7   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "8   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "9   867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "10  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "11  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "12  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "13  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "14  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "15  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "16  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "17  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "18  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "19  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "20  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "21  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "22  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "23  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "24  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "25  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "26  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "27  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "28  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "29  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "30  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "31  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "32  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "33  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "34  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "35  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "36  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "37  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "38  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "39  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "40  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "41  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "42  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "43  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "44  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "45  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "46  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "47  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "48  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "49  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "50  867d9bd186363c4315e015c0f5a011e5         0         40   \n",
       "\n",
       "                            d_headId  \n",
       "0   867d9bd186363c4315e015c0f5a011e5  \n",
       "1   867d9bd186363c4315e015c0f5a011e5  \n",
       "2   867d9bd186363c4315e015c0f5a011e5  \n",
       "3   867d9bd186363c4315e015c0f5a011e5  \n",
       "4   867d9bd186363c4315e015c0f5a011e5  \n",
       "5   867d9bd186363c4315e015c0f5a011e5  \n",
       "6   867d9bd186363c4315e015c0f5a011e5  \n",
       "7   867d9bd186363c4315e015c0f5a011e5  \n",
       "8   867d9bd186363c4315e015c0f5a011e5  \n",
       "9   867d9bd186363c4315e015c0f5a011e5  \n",
       "10  867d9bd186363c4315e015c0f5a011e5  \n",
       "11  867d9bd186363c4315e015c0f5a011e5  \n",
       "12  867d9bd186363c4315e015c0f5a011e5  \n",
       "13  867d9bd186363c4315e015c0f5a011e5  \n",
       "14  867d9bd186363c4315e015c0f5a011e5  \n",
       "15  867d9bd186363c4315e015c0f5a011e5  \n",
       "16  867d9bd186363c4315e015c0f5a011e5  \n",
       "17  867d9bd186363c4315e015c0f5a011e5  \n",
       "18  867d9bd186363c4315e015c0f5a011e5  \n",
       "19  867d9bd186363c4315e015c0f5a011e5  \n",
       "20  867d9bd186363c4315e015c0f5a011e5  \n",
       "21  867d9bd186363c4315e015c0f5a011e5  \n",
       "22  867d9bd186363c4315e015c0f5a011e5  \n",
       "23  867d9bd186363c4315e015c0f5a011e5  \n",
       "24  867d9bd186363c4315e015c0f5a011e5  \n",
       "25  867d9bd186363c4315e015c0f5a011e5  \n",
       "26  867d9bd186363c4315e015c0f5a011e5  \n",
       "27  867d9bd186363c4315e015c0f5a011e5  \n",
       "28  867d9bd186363c4315e015c0f5a011e5  \n",
       "29  867d9bd186363c4315e015c0f5a011e5  \n",
       "30  867d9bd186363c4315e015c0f5a011e5  \n",
       "31  867d9bd186363c4315e015c0f5a011e5  \n",
       "32  867d9bd186363c4315e015c0f5a011e5  \n",
       "33  867d9bd186363c4315e015c0f5a011e5  \n",
       "34  867d9bd186363c4315e015c0f5a011e5  \n",
       "35  867d9bd186363c4315e015c0f5a011e5  \n",
       "36  867d9bd186363c4315e015c0f5a011e5  \n",
       "37  867d9bd186363c4315e015c0f5a011e5  \n",
       "38  867d9bd186363c4315e015c0f5a011e5  \n",
       "39  867d9bd186363c4315e015c0f5a011e5  \n",
       "40  867d9bd186363c4315e015c0f5a011e5  \n",
       "41  867d9bd186363c4315e015c0f5a011e5  \n",
       "42  867d9bd186363c4315e015c0f5a011e5  \n",
       "43  867d9bd186363c4315e015c0f5a011e5  \n",
       "44  867d9bd186363c4315e015c0f5a011e5  \n",
       "45  867d9bd186363c4315e015c0f5a011e5  \n",
       "46  867d9bd186363c4315e015c0f5a011e5  \n",
       "47  867d9bd186363c4315e015c0f5a011e5  \n",
       "48  867d9bd186363c4315e015c0f5a011e5  \n",
       "49  867d9bd186363c4315e015c0f5a011e5  \n",
       "50  867d9bd186363c4315e015c0f5a011e5  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from urllib.parse import urlparse, parse_qs\n",
    "# 参数url=[urlparse(a) for a in 行业字典.values()]\n",
    "# 参数url\n",
    "df =pd.DataFrame([urlparse(a) for a in 行业列表])\n",
    "display(df)\n",
    "df_qs = pd.DataFrame([{k:v[0] for k,v in parse_qs(x).items()}   for x in df['query']])\n",
    "df_qs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'flushckid': ['1'],\n",
       " 'pageSize': ['40'],\n",
       " 'industryType': ['industry_01'],\n",
       " 'industries': ['040'],\n",
       " 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       " 'd_sfrom': ['search_unknown'],\n",
       " 'd_ckId': ['867d9bd186363c4315e015c0f5a011e5'],\n",
       " 'd_curPage': ['0'],\n",
       " 'd_pageSize': ['40'],\n",
       " 'd_headId': ['867d9bd186363c4315e015c0f5a011e5']}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def 行业参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "行业参数=行业参数拆解(行业列表[0])\n",
    "行业参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>行业代号</th>\n",
       "      <th>行业大类代号</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>互联网/电商</th>\n",
       "      <td>040</td>\n",
       "      <td>industry_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>游戏产业</th>\n",
       "      <td>420</td>\n",
       "      <td>industry_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>计算机软件</th>\n",
       "      <td>010</td>\n",
       "      <td>industry_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>IT服务</th>\n",
       "      <td>030</td>\n",
       "      <td>industry_01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>电子/芯片/半导体</th>\n",
       "      <td>050</td>\n",
       "      <td>industry_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>通信业</th>\n",
       "      <td>060</td>\n",
       "      <td>industry_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>计算机/网络设备</th>\n",
       "      <td>020</td>\n",
       "      <td>industry_02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>房地产/建筑</th>\n",
       "      <td>080</td>\n",
       "      <td>industry_03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>规划/设计/装潢</th>\n",
       "      <td>100</td>\n",
       "      <td>industry_03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>房地产服务</th>\n",
       "      <td>090</td>\n",
       "      <td>industry_03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>银行</th>\n",
       "      <td>130</td>\n",
       "      <td>industry_04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>保险</th>\n",
       "      <td>140</td>\n",
       "      <td>industry_04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>基金/证券/投资</th>\n",
       "      <td>150</td>\n",
       "      <td>industry_04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>会计/审计</th>\n",
       "      <td>430</td>\n",
       "      <td>industry_04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>信托/担保/拍卖</th>\n",
       "      <td>500</td>\n",
       "      <td>industry_04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>快消品</th>\n",
       "      <td>190</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>批发零售</th>\n",
       "      <td>240</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>服装纺织</th>\n",
       "      <td>200</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>家具/家电</th>\n",
       "      <td>210</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>办公设备</th>\n",
       "      <td>220</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>奢侈品/收藏品</th>\n",
       "      <td>460</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>珠宝/玩具/工艺品</th>\n",
       "      <td>470</td>\n",
       "      <td>industry_05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>汽车/摩托车</th>\n",
       "      <td>350</td>\n",
       "      <td>industry_06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>机械/机电/重工</th>\n",
       "      <td>360</td>\n",
       "      <td>industry_06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>印刷/包装/造纸</th>\n",
       "      <td>180</td>\n",
       "      <td>industry_06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>原材料加工</th>\n",
       "      <td>370</td>\n",
       "      <td>industry_06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>仪器/电气/自动化</th>\n",
       "      <td>340</td>\n",
       "      <td>industry_06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>制药/生物工程</th>\n",
       "      <td>270</td>\n",
       "      <td>industry_10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>医疗/保健/美容</th>\n",
       "      <td>280</td>\n",
       "      <td>industry_10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>医疗器械</th>\n",
       "      <td>290</td>\n",
       "      <td>industry_10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>能源/水利</th>\n",
       "      <td>330</td>\n",
       "      <td>industry_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>石油/化工</th>\n",
       "      <td>310</td>\n",
       "      <td>industry_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>采掘/冶炼/矿产</th>\n",
       "      <td>320</td>\n",
       "      <td>industry_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>环保</th>\n",
       "      <td>300</td>\n",
       "      <td>industry_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>新能源</th>\n",
       "      <td>490</td>\n",
       "      <td>industry_11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>专业服务</th>\n",
       "      <td>120</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>中介服务</th>\n",
       "      <td>110</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>外包服务</th>\n",
       "      <td>440</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>检测/认证</th>\n",
       "      <td>450</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>餐饮/酒旅/服务</th>\n",
       "      <td>230</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>文体娱乐</th>\n",
       "      <td>260</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>租赁服务</th>\n",
       "      <td>510</td>\n",
       "      <td>industry_07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>广告/市场/会展</th>\n",
       "      <td>070</td>\n",
       "      <td>industry_08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>影视文化</th>\n",
       "      <td>170</td>\n",
       "      <td>industry_08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>教育培训</th>\n",
       "      <td>380</td>\n",
       "      <td>industry_08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>交通/物流/运输</th>\n",
       "      <td>250</td>\n",
       "      <td>industry_09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>贸易/进出口</th>\n",
       "      <td>160</td>\n",
       "      <td>industry_09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>航空/航天</th>\n",
       "      <td>480</td>\n",
       "      <td>industry_09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>政务/公共服务</th>\n",
       "      <td>390</td>\n",
       "      <td>industry_12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>农林牧渔</th>\n",
       "      <td>410</td>\n",
       "      <td>industry_12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>其他行业</th>\n",
       "      <td>400</td>\n",
       "      <td>industry_12</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          行业代号       行业大类代号\n",
       "互联网/电商     040  industry_01\n",
       "游戏产业       420  industry_01\n",
       "计算机软件      010  industry_01\n",
       "IT服务       030  industry_01\n",
       "电子/芯片/半导体  050  industry_02\n",
       "通信业        060  industry_02\n",
       "计算机/网络设备   020  industry_02\n",
       "房地产/建筑     080  industry_03\n",
       "规划/设计/装潢   100  industry_03\n",
       "房地产服务      090  industry_03\n",
       "银行         130  industry_04\n",
       "保险         140  industry_04\n",
       "基金/证券/投资   150  industry_04\n",
       "会计/审计      430  industry_04\n",
       "信托/担保/拍卖   500  industry_04\n",
       "快消品        190  industry_05\n",
       "批发零售       240  industry_05\n",
       "服装纺织       200  industry_05\n",
       "家具/家电      210  industry_05\n",
       "办公设备       220  industry_05\n",
       "奢侈品/收藏品    460  industry_05\n",
       "珠宝/玩具/工艺品  470  industry_05\n",
       "汽车/摩托车     350  industry_06\n",
       "机械/机电/重工   360  industry_06\n",
       "印刷/包装/造纸   180  industry_06\n",
       "原材料加工      370  industry_06\n",
       "仪器/电气/自动化  340  industry_06\n",
       "制药/生物工程    270  industry_10\n",
       "医疗/保健/美容   280  industry_10\n",
       "医疗器械       290  industry_10\n",
       "能源/水利      330  industry_11\n",
       "石油/化工      310  industry_11\n",
       "采掘/冶炼/矿产   320  industry_11\n",
       "环保         300  industry_11\n",
       "新能源        490  industry_11\n",
       "专业服务       120  industry_07\n",
       "中介服务       110  industry_07\n",
       "外包服务       440  industry_07\n",
       "检测/认证      450  industry_07\n",
       "餐饮/酒旅/服务   230  industry_07\n",
       "文体娱乐       260  industry_07\n",
       "租赁服务       510  industry_07\n",
       "广告/市场/会展   070  industry_08\n",
       "影视文化       170  industry_08\n",
       "教育培训       380  industry_08\n",
       "交通/物流/运输   250  industry_09\n",
       "贸易/进出口     160  industry_09\n",
       "航空/航天      480  industry_09\n",
       "政务/公共服务    390  industry_12\n",
       "农林牧渔       410  industry_12\n",
       "其他行业       400  industry_12"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业分类代号={k:行业参数拆解(v)['industryType']  for k,v in 行业字典.items()}\n",
    "行业分类代号\n",
    "行业代号拆解={k:行业参数拆解(v)['industries']  for k,v in 行业字典.items()}\n",
    "行业代号拆解\n",
    "行业分类代号={k:行业参数拆解(v)['industryType']  for k,v in 行业字典.items()}\n",
    "行业分类代号\n",
    "行业分类代号拆解=pd.DataFrame({k:行业参数拆解(v)['industryType']  for k,v in 行业字典.items()})\n",
    "cf=行业分类代号拆解.T.rename(columns={0:\"行业大类代号\"})\n",
    "cf\n",
    "ef=pd.DataFrame(行业代号拆解)\n",
    "hf=ef.T.rename(columns={0:\"行业代号\"})\n",
    "hf=hf.assign(行业大类代号=cf['行业大类代号'])\n",
    "hf"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'flushckid': ['1'],\n",
       " 'pageSize': ['40'],\n",
       " 'industryType': ['industry_09'],\n",
       " 'industries': ['160'],\n",
       " 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       " 'd_sfrom': ['search_unknown'],\n",
       " 'd_ckId': ['867d9bd186363c4315e015c0f5a011e5'],\n",
       " 'd_curPage': ['0'],\n",
       " 'd_pageSize': ['40'],\n",
       " 'd_headId': ['867d9bd186363c4315e015c0f5a011e5'],\n",
       " 'key': ['广告']}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def 行业参数调整(industryType,industries,key):\n",
    "    参数=行业参数.copy()\n",
    "    参数['industryType']=industryType\n",
    "    参数['industries']=industries\n",
    "    参数['key']=key\n",
    "    return 参数\n",
    "#k:行业参数调整(industryType=['industry_09'],industries=[],\n",
    "行业_keyword=行业参数调整(industryType=['industry_09'],industries=['160'],key=['广告'])\n",
    "行业_keyword"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'flushckid': ['1'],\n",
       " 'pageSize': ['40'],\n",
       " 'industryType': ['industry_09'],\n",
       " 'industries': ['160'],\n",
       " 'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       " 'd_sfrom': ['search_unknown'],\n",
       " 'd_ckId': ['867d9bd186363c4315e015c0f5a011e5'],\n",
       " 'd_curPage': ['0'],\n",
       " 'd_pageSize': ['40'],\n",
       " 'd_headId': ['867d9bd186363c4315e015c0f5a011e5'],\n",
       " 'key': ['广告']}"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def 行业参数调整(industryType,industries,key):\n",
    "    参数=行业参数.copy()\n",
    "    参数['industryType']=industryType\n",
    "    参数['industries']=industries\n",
    "    参数['key']=key\n",
    "    return 参数\n",
    "#k:行业参数调整(industryType=['industry_09'],industries=[],\n",
    "行业_keyword=行业参数调整(industryType=['industry_09'],industries=['160'],key=['广告'])\n",
    "行业_keyword"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 指定行业类别（广告·传媒·艺术·文化——广告/市场/会展）关键词搜索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [0]},\n",
       " 1: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [1]},\n",
       " 2: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [2]},\n",
       " 3: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [3]},\n",
       " 4: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [4]},\n",
       " 5: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [5]},\n",
       " 6: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [6]},\n",
       " 7: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [7]},\n",
       " 8: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [8]},\n",
       " 9: {'pageSize': ['40'],\n",
       "  'sortFlag': ['°radeFlag=0'],\n",
       "  'siTag': ['1B2M2Y8AsgTpgAmY7PhCfg~fA9rXquZc5IkJpXC-Ycixw'],\n",
       "  'd_sfrom': ['search_unknown'],\n",
       "  'd_ckId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['9c54104fe09246215348e6dca5279583'],\n",
       "  'curPage': ['1'],\n",
       "  'industryType': ['industry_08'],\n",
       "  'industries': ['070'],\n",
       "  'key': ['广告'],\n",
       "  'curPage_int': [9]}}"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "行业url =\"https://www.liepin.com/zhaopin/?industries=070&subIndustry=&dqs=&salary=&jobKind=&pubTime=&compkind=&compscale=&searchType=1&isAnalysis=&sortFlag=15&d_headId=591ebc1760fee953b72353bace17e6cc&d_ckId=88f95b2f7dcac1d2762df7388ef590a7&d_sfrom=search_prime&d_curPage=0&d_pageSize=40&siTag=bFGQTbwE_AAQSb-u11jrBw%7EJXvpnxg7eZ6GFs7p-osX2Q&key=%E5%B9%BF%E5%91%8A\"\n",
    "r=session.get(url)\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "翻页url={x.text:x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)}\n",
    "翻页url\n",
    "城市列表=[x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "df=pd.DataFrame([urlparse(x) for x in 城市列表 ])\n",
    "df_qs=pd.DataFrame([{ k:v[0] for k,v in parse_qs(x).items()}for x in df['query']])\n",
    "df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) \n",
    "df_qs\n",
    "长度=int(df_qs.curPage_int.max()+1)\n",
    "def 翻页_参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "\n",
    "翻页_参数模板=翻页_参数拆解(城市列表[0])\n",
    "翻页_参数模板\n",
    "def 行业翻页(industryType,industries,key,curPage_int):\n",
    "    参数=翻页_参数模板.copy()\n",
    "    参数['industryType']=industryType\n",
    "    参数['industries']=industries\n",
    "    参数['key']=key\n",
    "    参数['curPage_int']=curPage_int\n",
    "    return 参数\n",
    "# e=翻页url(url,industryType=['industry_01'],industries=['010'],key=['产品经理'])\n",
    "# e\n",
    "翻页_参数={i:行业翻页(industryType=['industry_08'],industries=['070'],key=['广告'],curPage_int = [i]  )\n",
    "               for i in  range(0,长度)\n",
    "      }\n",
    "# # 指定公司_关键词_翻页参数=翻页_参数调整(key=['数据挖掘'],compIds=['7983148'],curPage_int= [i])\n",
    "# #                                    for i in  range(0,长度)\n",
    "# #                                   }\n",
    "翻页_参数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "广告 10\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>序</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>15-30k·12薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>广告销售总监</td>\n",
       "      <td></td>\n",
       "      <td>深圳英派科特广告传媒有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1928727381.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8945895/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>10-20k·12薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>广告视频策划</td>\n",
       "      <td></td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1928662937.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>7-14k·12薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>电商广告销售顾问</td>\n",
       "      <td></td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1928662935.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>8-12k·12薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>信息流广告优化师</td>\n",
       "      <td></td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1928662927.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>8-15k·12薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>电商广告优化师</td>\n",
       "      <td></td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1928662917.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>35</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>广告面销高级经理</td>\n",
       "      <td>泉州</td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1927470139.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>36</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>电商广告销售顾问</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1927467127.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>37</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>10-15k·15薪</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>广告销售经理</td>\n",
       "      <td>广州-珠江新城</td>\n",
       "      <td>深圳英派科特广告传媒有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1927461503.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8945895/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>38</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>广告增值经理</td>\n",
       "      <td>深圳-南山区</td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1927382321.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>39</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>面议</td>\n",
       "      <td>2020年07月18日</td>\n",
       "      <td>电商广告面销顾问</td>\n",
       "      <td>广州-海珠区</td>\n",
       "      <td>今日头条</td>\n",
       "      <td>https://www.liepin.com/job/1927368337.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9630160/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index    edu     经验          薪水           时间        职称     公司地点  \\\n",
       "序                                                                      \n",
       "0        0  大专及以上  5-10年  15-30k·12薪  2020年07月18日    广告销售总监            \n",
       "1        1  本科及以上   经验不限  10-20k·12薪  2020年07月18日    广告视频策划            \n",
       "2        2  本科及以上   经验不限   7-14k·12薪  2020年07月18日  电商广告销售顾问            \n",
       "3        3   统招本科   经验不限   8-12k·12薪  2020年07月18日  信息流广告优化师            \n",
       "4        4   统招本科   经验不限   8-15k·12薪  2020年07月18日   电商广告优化师            \n",
       "..     ...    ...    ...         ...          ...       ...      ...   \n",
       "395     35  大专及以上  5-10年          面议  2020年07月18日  广告面销高级经理       泉州   \n",
       "396     36  大专及以上   1-3年          面议  2020年07月18日  电商广告销售顾问   广州-海珠区   \n",
       "397     37  大专及以上   1-3年  10-15k·15薪  2020年07月18日    广告销售经理  广州-珠江新城   \n",
       "398     38  本科及以上   3-5年          面议  2020年07月18日    广告增值经理   深圳-南山区   \n",
       "399     39  大专及以上   1-3年          面议  2020年07月18日  电商广告面销顾问   广州-海珠区   \n",
       "\n",
       "               公司名称                                           链结  \\\n",
       "序                                                                  \n",
       "0    深圳英派科特广告传媒有限公司  https://www.liepin.com/job/1928727381.shtml   \n",
       "1              今日头条  https://www.liepin.com/job/1928662937.shtml   \n",
       "2              今日头条  https://www.liepin.com/job/1928662935.shtml   \n",
       "3              今日头条  https://www.liepin.com/job/1928662927.shtml   \n",
       "4              今日头条  https://www.liepin.com/job/1928662917.shtml   \n",
       "..              ...                                          ...   \n",
       "395            今日头条  https://www.liepin.com/job/1927470139.shtml   \n",
       "396            今日头条  https://www.liepin.com/job/1927467127.shtml   \n",
       "397  深圳英派科特广告传媒有限公司  https://www.liepin.com/job/1927461503.shtml   \n",
       "398            今日头条  https://www.liepin.com/job/1927382321.shtml   \n",
       "399            今日头条  https://www.liepin.com/job/1927368337.shtml   \n",
       "\n",
       "                                       公司URL keyword  curPage  \n",
       "序                                                              \n",
       "0    https://www.liepin.com/company/8945895/      广告        0  \n",
       "1    https://www.liepin.com/company/9630160/      广告        0  \n",
       "2    https://www.liepin.com/company/9630160/      广告        0  \n",
       "3    https://www.liepin.com/company/9630160/      广告        0  \n",
       "4    https://www.liepin.com/company/9630160/      广告        0  \n",
       "..                                       ...     ...      ...  \n",
       "395  https://www.liepin.com/company/9630160/      广告        9  \n",
       "396  https://www.liepin.com/company/9630160/      广告        9  \n",
       "397  https://www.liepin.com/company/8945895/      广告        9  \n",
       "398  https://www.liepin.com/company/9630160/      广告        9  \n",
       "399  https://www.liepin.com/company/9630160/      广告        9  \n",
       "\n",
       "[400 rows x 12 columns]"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession\n",
    "session =HTMLSession()\n",
    "url=\"https://www.liepin.com/zhaopin/\"\n",
    "r=session.get(url)\n",
    "keywords = ['广告']\n",
    "list_df=list()\n",
    "for key in keywords:\n",
    "    翻页_参数={i:行业翻页(industryType=['industry_08'],industries=['070'],key=[key],curPage_int = [i]  )\n",
    "               for i in  range(0,长度)\n",
    "      }\n",
    "    print (key,长度)\n",
    "    for k,v in  翻页_参数.items():\n",
    "        payload = v\n",
    "        df = requests_liepin( url, params = payload)\n",
    "\n",
    "        df = df.assign (keyword = key)  # 区分  keyword    \n",
    "        df = df.assign (curPage = k)  # 区分  curPage\n",
    "        \n",
    "        list_df.append(df)\n",
    "        \n",
    "df_all = pd.concat(list_df).reset_index()\n",
    "df_all.index.name = '序'\n",
    "df_all"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 城市爬取（广州）"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['/zhaopin/?compkind=&dqs=&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=010&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=050020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=050090&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=030&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=060080&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=040&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=060020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=070020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=210040&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=280020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1',\n",
       " '/zhaopin/?compkind=&dqs=170020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ%7E_FrslumzzaHrHe3aSW0VTQ&d_sfrom=search_prime&d_ckId=4d26e12fa15cd5c60d452983b15c52bc&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1']"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url=\"https://www.liepin.com/zhaopin/?compkind=&dqs=050020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ~F5FSJAXvyHmQyODXqGxdVw&d_sfrom=search_prime&d_ckId=b6511cc2da93578c9d34605a028dacd1&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1\"\n",
    "r=session.get(url)\n",
    "xpath_城市=r.html.xpath(\"//div[@class='search-conditions']/dl[3]/dd/a[starts-with(@href,'/zhaopin')]\")\n",
    "xpath_城市\n",
    "城市字典={x.xpath(\"a/text()\")[0]:x.xpath(\"a/@href\")[0]  for x in xpath_城市}\n",
    "城市字典\n",
    "城市列表=[x.xpath(\"a/@href\")[0] for x in xpath_城市]\n",
    "城市列表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>pageSize</th>\n",
       "      <th>sortFlag</th>\n",
       "      <th>key</th>\n",
       "      <th>siTag</th>\n",
       "      <th>d_sfrom</th>\n",
       "      <th>d_ckId</th>\n",
       "      <th>d_curPage</th>\n",
       "      <th>d_pageSize</th>\n",
       "      <th>d_headId</th>\n",
       "      <th>dqs</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>050020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>050090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>060080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>060020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>070020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>210040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>280020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>40</td>\n",
       "      <td>15</td>\n",
       "      <td>广告</td>\n",
       "      <td>us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ</td>\n",
       "      <td>search_prime</td>\n",
       "      <td>4d26e12fa15cd5c60d452983b15c52bc</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>b6511cc2da93578c9d34605a028dacd1</td>\n",
       "      <td>170020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   pageSize sortFlag key                                          siTag  \\\n",
       "0        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "1        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "2        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "3        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "4        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "5        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "6        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "7        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "8        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "9        40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "10       40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "11       40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "12       40       15  广告  us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ   \n",
       "\n",
       "         d_sfrom                            d_ckId d_curPage d_pageSize  \\\n",
       "0   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "1   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "2   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "3   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "4   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "5   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "6   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "7   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "8   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "9   search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "10  search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "11  search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "12  search_prime  4d26e12fa15cd5c60d452983b15c52bc         0         40   \n",
       "\n",
       "                            d_headId     dqs  \n",
       "0   b6511cc2da93578c9d34605a028dacd1     NaN  \n",
       "1   b6511cc2da93578c9d34605a028dacd1     010  \n",
       "2   b6511cc2da93578c9d34605a028dacd1     020  \n",
       "3   b6511cc2da93578c9d34605a028dacd1  050020  \n",
       "4   b6511cc2da93578c9d34605a028dacd1  050090  \n",
       "5   b6511cc2da93578c9d34605a028dacd1     030  \n",
       "6   b6511cc2da93578c9d34605a028dacd1  060080  \n",
       "7   b6511cc2da93578c9d34605a028dacd1     040  \n",
       "8   b6511cc2da93578c9d34605a028dacd1  060020  \n",
       "9   b6511cc2da93578c9d34605a028dacd1  070020  \n",
       "10  b6511cc2da93578c9d34605a028dacd1  210040  \n",
       "11  b6511cc2da93578c9d34605a028dacd1  280020  \n",
       "12  b6511cc2da93578c9d34605a028dacd1  170020  "
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def 城市参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "城市参数=城市参数拆解( 城市列表[0])\n",
    "df_城市=pd.DataFrame([urlparse(x) for x in 城市列表 ])\n",
    "\n",
    "df_城市_qs=pd.DataFrame([{k:v[0]   \n",
    "                         if len(v[0])>=1  else 0 \\\n",
    "                        for k,v in parse_qs(x).items() }for x in df_城市['query']])\n",
    "# df_城市_qs.loc[0,[\"dqs\"]].fillna(0)\n",
    "df_城市_qs\n",
    "#df_城市_qs.nunique()\n",
    "#唯一值是dqs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>城市</th>\n",
       "      <th>城市代号</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>索引</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>全国</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>北京</td>\n",
       "      <td>010</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>上海</td>\n",
       "      <td>020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>广州</td>\n",
       "      <td>050020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>深圳</td>\n",
       "      <td>050090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>天津</td>\n",
       "      <td>030</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>苏州</td>\n",
       "      <td>060080</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>重庆</td>\n",
       "      <td>040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>南京</td>\n",
       "      <td>060020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>杭州</td>\n",
       "      <td>070020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>大连</td>\n",
       "      <td>210040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>成都</td>\n",
       "      <td>280020</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>武汉</td>\n",
       "      <td>170020</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    城市    城市代号\n",
       "索引            \n",
       "0   全国     NaN\n",
       "1   北京     010\n",
       "2   上海     020\n",
       "3   广州  050020\n",
       "4   深圳  050090\n",
       "5   天津     030\n",
       "6   苏州  060080\n",
       "7   重庆     040\n",
       "8   南京  060020\n",
       "9   杭州  070020\n",
       "10  大连  210040\n",
       "11  成都  280020\n",
       "12  武汉  170020"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "城市代号=pd.DataFrame(df_城市_qs['dqs'])\n",
    "\n",
    "城市代号.index.name='索引'\n",
    "城市名称=pd.DataFrame(城市字典.keys())\n",
    "城市名称.index.name='索引'\n",
    "城市编号=pd.merge(城市名称,城市代号,on='索引').rename(columns={0:\"城市\",\"dqs\":\"城市代号\"})\n",
    "城市编号"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 翻页"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{0: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [0]},\n",
       " 1: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [1]},\n",
       " 2: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [2]},\n",
       " 3: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [3]},\n",
       " 4: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [4]},\n",
       " 5: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [5]},\n",
       " 6: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [6]},\n",
       " 7: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [7]},\n",
       " 8: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [8]},\n",
       " 9: {'pageSize': ['40'],\n",
       "  'sortFlag': ['15'],\n",
       "  'key': ['广告'],\n",
       "  'siTag': ['us1vR3GVLJxdJU3nHEhbBQ~_FrslumzzaHrHe3aSW0VTQ'],\n",
       "  'd_sfrom': ['search_prime'],\n",
       "  'd_ckId': ['4d26e12fa15cd5c60d452983b15c52bc'],\n",
       "  'd_curPage': ['0'],\n",
       "  'd_pageSize': ['40'],\n",
       "  'd_headId': ['b6511cc2da93578c9d34605a028dacd1'],\n",
       "  'dqs': [''],\n",
       "  'curPage_int': [9]}}"
      ]
     },
     "execution_count": 46,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url=\"https://www.liepin.com/zhaopin/?compkind=&dqs=050020&pubTime=&pageSize=40&salary=&compTag=&sortFlag=15&compIds=&subIndustry=&jobKind=&industries=&compscale=&key=%E5%B9%BF%E5%91%8A&siTag=us1vR3GVLJxdJU3nHEhbBQ~F5FSJAXvyHmQyODXqGxdVw&d_sfrom=search_prime&d_ckId=b6511cc2da93578c9d34605a028dacd1&d_curPage=0&d_pageSize=40&d_headId=b6511cc2da93578c9d34605a028dacd1\"\n",
    "r=session.get(url)\n",
    "xpath_翻页a = '//div[@class=\"pagerbar\"]/a[starts-with(@href,\"/zhaopin\")]'\n",
    "城市列表=[x.xpath('//@href')[0] for x in r.html.xpath(xpath_翻页a)]\n",
    "df=pd.DataFrame([urlparse(x) for x in 城市列表 ])\n",
    "df_qs =pd.DataFrame([{ k:v[0] for k,v in parse_qs(x).items()}for x in df['query']])\n",
    "df_qs = df_qs.assign (curPage_int=df_qs.curPage.astype(int)) \n",
    "df_qs\n",
    "长度=int(df_qs.curPage_int.max()+1)\n",
    "def 翻页_参数拆解(url):\n",
    "    sixpart=urlparse(url)\n",
    "    out=parse_qs(sixpart.query)\n",
    "    return out\n",
    "\n",
    "翻页_参数模板=翻页_参数拆解(城市列表[0])\n",
    "翻页_参数模板\n",
    "def 翻页_参数调整(key,dqs,curPage_int):\n",
    "    参数=城市参数.copy()\n",
    "    参数['dqs']=dqs\n",
    "    参数['key']=key\n",
    "    参数['curPage_int']=curPage_int\n",
    "    return 参数\n",
    "#翻页_城市\n",
    "翻页_城市_参数={i:翻页_参数调整(key=['广告'],dqs=[''],curPage_int=[i]  )for i in  range(0,长度)  }\n",
    "翻页_城市_参数"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 指定城市关键词爬取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "广告 10\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>索引</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月13日</td>\n",
       "      <td>广告销售总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海盛闳广告有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929874517.shtml</td>\n",
       "      <td>https://www.liepin.com/company/12308481/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月14日</td>\n",
       "      <td>广告优化师（头条，广点通）</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海嵩恒网络科技股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929908883.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7877259/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-40k·13薪</td>\n",
       "      <td>2020年07月17日</td>\n",
       "      <td>广告投放总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>BIO Group(中国)</td>\n",
       "      <td>https://www.liepin.com/job/1929934427.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8440610/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>7-15k·12薪</td>\n",
       "      <td>2020年06月05日</td>\n",
       "      <td>广告客户经理</td>\n",
       "      <td>上海-花木</td>\n",
       "      <td>上海知行文化传媒有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1911201211.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9204875/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-30k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告发布系统后台开发工程师</td>\n",
       "      <td>上海</td>\n",
       "      <td>路盛文化传播(上海)有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1930049541.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9801013/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>35</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>30-60k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>品牌广告直客销售</td>\n",
       "      <td>上海,广州,北京</td>\n",
       "      <td>小红书</td>\n",
       "      <td>https://www.liepin.com/a/21047559.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>36</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>42-67k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>视频广告销售群总监</td>\n",
       "      <td></td>\n",
       "      <td>JR</td>\n",
       "      <td>https://www.liepin.com/a/20898991.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>37</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>40-70k·16薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告度量负责人 Solution engineer Leader</td>\n",
       "      <td>上海</td>\n",
       "      <td>国内知名独角兽移动互联网的科技企业</td>\n",
       "      <td>https://www.liepin.com/a/20540781.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>38</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>45-75k·16薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告算法负责人</td>\n",
       "      <td>上海,北京,杭州</td>\n",
       "      <td>国内头部上市互联网公司</td>\n",
       "      <td>https://www.liepin.com/a/20540681.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>39</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>15-20k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告客户经理</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海文化传播公司</td>\n",
       "      <td>https://www.liepin.com/a/20515479.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index    edu     经验          薪水           时间  \\\n",
       "索引                                                  \n",
       "0        0  本科及以上  5-10年  10-15k·12薪  2020年07月13日   \n",
       "1        1  本科及以上   3-5年  10-15k·12薪  2020年07月14日   \n",
       "2        2  本科及以上  5-10年  20-40k·13薪  2020年07月17日   \n",
       "3        3  大专及以上   1-3年   7-15k·12薪  2020年06月05日   \n",
       "4        4  本科及以上  5-10年  20-30k·12薪  2020年07月19日   \n",
       "..     ...    ...    ...         ...          ...   \n",
       "395     35   统招本科  5-10年  30-60k·12薪  2020年07月19日   \n",
       "396     36  本科及以上   3-5年  42-67k·12薪  2020年07月19日   \n",
       "397     37   统招本科   3-5年  40-70k·16薪  2020年07月19日   \n",
       "398     38   统招本科   3-5年  45-75k·16薪  2020年07月19日   \n",
       "399     39  本科及以上   经验不限  15-20k·12薪  2020年07月19日   \n",
       "\n",
       "                                   职称      公司地点               公司名称  \\\n",
       "索引                                                                   \n",
       "0                              广告销售总监        上海         上海盛闳广告有限公司   \n",
       "1                       广告优化师（头条，广点通）        上海     上海嵩恒网络科技股份有限公司   \n",
       "2                              广告投放总监        上海      BIO Group(中国)   \n",
       "3                              广告客户经理     上海-花木       上海知行文化传媒有限公司   \n",
       "4                       广告发布系统后台开发工程师        上海     路盛文化传播(上海)有限公司   \n",
       "..                                ...       ...                ...   \n",
       "395                          品牌广告直客销售  上海,广州,北京                小红书   \n",
       "396                         视频广告销售群总监                           JR   \n",
       "397  广告度量负责人 Solution engineer Leader        上海  国内知名独角兽移动互联网的科技企业   \n",
       "398                           广告算法负责人  上海,北京,杭州        国内头部上市互联网公司   \n",
       "399                            广告客户经理        上海           上海文化传播公司   \n",
       "\n",
       "                                              链结  \\\n",
       "索引                                                 \n",
       "0    https://www.liepin.com/job/1929874517.shtml   \n",
       "1    https://www.liepin.com/job/1929908883.shtml   \n",
       "2    https://www.liepin.com/job/1929934427.shtml   \n",
       "3    https://www.liepin.com/job/1911201211.shtml   \n",
       "4    https://www.liepin.com/job/1930049541.shtml   \n",
       "..                                           ...   \n",
       "395      https://www.liepin.com/a/21047559.shtml   \n",
       "396      https://www.liepin.com/a/20898991.shtml   \n",
       "397      https://www.liepin.com/a/20540781.shtml   \n",
       "398      https://www.liepin.com/a/20540681.shtml   \n",
       "399      https://www.liepin.com/a/20515479.shtml   \n",
       "\n",
       "                                        公司URL keyword  curPage  \n",
       "索引                                                              \n",
       "0    https://www.liepin.com/company/12308481/      广告        0  \n",
       "1     https://www.liepin.com/company/7877259/      广告        0  \n",
       "2     https://www.liepin.com/company/8440610/      广告        0  \n",
       "3     https://www.liepin.com/company/9204875/      广告        0  \n",
       "4     https://www.liepin.com/company/9801013/      广告        0  \n",
       "..                                        ...     ...      ...  \n",
       "395                                                广告        9  \n",
       "396                                                广告        9  \n",
       "397                                                广告        9  \n",
       "398                                                广告        9  \n",
       "399                                                广告        9  \n",
       "\n",
       "[400 rows x 12 columns]"
      ]
     },
     "execution_count": 53,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession\n",
    "session =HTMLSession()\n",
    "url=\"https://www.liepin.com/zhaopin/\"\n",
    "r=session.get(url)\n",
    "keywords = ['广告']\n",
    "list_df=list()\n",
    "for key in keywords:\n",
    "    翻页_城市_参数={i:翻页_参数调整(key=[key],dqs=['020'],curPage_int=[i]  )for i in  range(0,长度)  }\n",
    "    print (key,长度)\n",
    "    for k,v in  翻页_城市_参数.items():\n",
    "        payload = v\n",
    "        df = requests_liepin( url, params = payload)\n",
    "\n",
    "\n",
    "        df = df.assign (keyword = key)  # 区分  keyword    \n",
    "        df = df.assign (curPage = k)  # 区分  curPage   \n",
    "        \n",
    "        list_df.append(df)\n",
    "        \n",
    "df_指定城市= pd.concat(list_df).reset_index()\n",
    "df_指定城市.index.name = '索引'\n",
    "df_指定城市"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 全国范围"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "广告 10\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>索引</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月13日</td>\n",
       "      <td>广告销售总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海盛闳广告有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929874517.shtml</td>\n",
       "      <td>https://www.liepin.com/company/12308481/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月01日</td>\n",
       "      <td>广告销售经理</td>\n",
       "      <td>北京</td>\n",
       "      <td>派瑞威行</td>\n",
       "      <td>https://www.liepin.com/job/1929546261.shtml</td>\n",
       "      <td>https://www.liepin.com/company/1322645/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-40k·13薪</td>\n",
       "      <td>2020年07月17日</td>\n",
       "      <td>广告投放总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>BIO Group(中国)</td>\n",
       "      <td>https://www.liepin.com/job/1929934427.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8440610/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月14日</td>\n",
       "      <td>广告优化师（头条，广点通）</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海嵩恒网络科技股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929908883.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7877259/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>17-25k·13薪</td>\n",
       "      <td>2020年07月09日</td>\n",
       "      <td>海外广告优化leader</td>\n",
       "      <td>北京</td>\n",
       "      <td>信征(北京)信息技术有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929780845.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10006489/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>35</td>\n",
       "      <td>学历不限</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>5-8k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>服装试衣广告工作</td>\n",
       "      <td>杭州-上城区</td>\n",
       "      <td>上海应承服饰有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929835399.shtml</td>\n",
       "      <td>https://www.liepin.com/company/12213985/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>36</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告营销主管</td>\n",
       "      <td></td>\n",
       "      <td>喀斯玛(北京)科技有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929826605.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10100863/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>37</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>15-30k·13薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>海外广告投放（Facebook、Google）</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海弘贯网络科技有限公司广州分公司</td>\n",
       "      <td>https://www.liepin.com/job/1929812595.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10105391/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>38</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>8-12k·13薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>游戏广告投放/广点通/头条投放</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海弘贯网络科技有限公司广州分公司</td>\n",
       "      <td>https://www.liepin.com/job/1929812539.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10105391/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>39</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>6-12k·13薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告设计/平面设计</td>\n",
       "      <td>广州-天河区</td>\n",
       "      <td>上海弘贯网络科技有限公司广州分公司</td>\n",
       "      <td>https://www.liepin.com/job/1929812003.shtml</td>\n",
       "      <td>https://www.liepin.com/company/10105391/</td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index    edu     经验          薪水           时间                       职称  \\\n",
       "索引                                                                           \n",
       "0        0  本科及以上  5-10年  10-15k·12薪  2020年07月13日                   广告销售总监   \n",
       "1        1   统招本科   1-3年  10-15k·12薪  2020年07月01日                   广告销售经理   \n",
       "2        2  本科及以上  5-10年  20-40k·13薪  2020年07月17日                   广告投放总监   \n",
       "3        3  本科及以上   3-5年  10-15k·12薪  2020年07月14日            广告优化师（头条，广点通）   \n",
       "4        4  大专及以上   3-5年  17-25k·13薪  2020年07月09日             海外广告优化leader   \n",
       "..     ...    ...    ...         ...          ...                      ...   \n",
       "395     35   学历不限   经验不限    5-8k·12薪  2020年07月19日                 服装试衣广告工作   \n",
       "396     36  本科及以上   3-5年  10-15k·12薪  2020年07月19日                   广告营销主管   \n",
       "397     37  大专及以上   1-3年  15-30k·13薪  2020年07月19日  海外广告投放（Facebook、Google）   \n",
       "398     38  大专及以上   1-3年   8-12k·13薪  2020年07月19日          游戏广告投放/广点通/头条投放   \n",
       "399     39  大专及以上   1-3年   6-12k·13薪  2020年07月19日                广告设计/平面设计   \n",
       "\n",
       "       公司地点               公司名称                                           链结  \\\n",
       "索引                                                                            \n",
       "0        上海         上海盛闳广告有限公司  https://www.liepin.com/job/1929874517.shtml   \n",
       "1        北京               派瑞威行  https://www.liepin.com/job/1929546261.shtml   \n",
       "2        上海      BIO Group(中国)  https://www.liepin.com/job/1929934427.shtml   \n",
       "3        上海     上海嵩恒网络科技股份有限公司  https://www.liepin.com/job/1929908883.shtml   \n",
       "4        北京     信征(北京)信息技术有限公司  https://www.liepin.com/job/1929780845.shtml   \n",
       "..      ...                ...                                          ...   \n",
       "395  杭州-上城区         上海应承服饰有限公司  https://www.liepin.com/job/1929835399.shtml   \n",
       "396              喀斯玛(北京)科技有限公司  https://www.liepin.com/job/1929826605.shtml   \n",
       "397  广州-天河区  上海弘贯网络科技有限公司广州分公司  https://www.liepin.com/job/1929812595.shtml   \n",
       "398  广州-天河区  上海弘贯网络科技有限公司广州分公司  https://www.liepin.com/job/1929812539.shtml   \n",
       "399  广州-天河区  上海弘贯网络科技有限公司广州分公司  https://www.liepin.com/job/1929812003.shtml   \n",
       "\n",
       "                                        公司URL keyword  curPage  \n",
       "索引                                                              \n",
       "0    https://www.liepin.com/company/12308481/      广告        0  \n",
       "1     https://www.liepin.com/company/1322645/      广告        0  \n",
       "2     https://www.liepin.com/company/8440610/      广告        0  \n",
       "3     https://www.liepin.com/company/7877259/      广告        0  \n",
       "4    https://www.liepin.com/company/10006489/      广告        0  \n",
       "..                                        ...     ...      ...  \n",
       "395  https://www.liepin.com/company/12213985/      广告        9  \n",
       "396  https://www.liepin.com/company/10100863/      广告        9  \n",
       "397  https://www.liepin.com/company/10105391/      广告        9  \n",
       "398  https://www.liepin.com/company/10105391/      广告        9  \n",
       "399  https://www.liepin.com/company/10105391/      广告        9  \n",
       "\n",
       "[400 rows x 12 columns]"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import time\n",
    "from random import random\n",
    "from requests_html import HTMLSession\n",
    "session =HTMLSession()\n",
    "url=\"https://www.liepin.com/zhaopin/\"\n",
    "r=session.get(url)\n",
    "keywords = ['广告']\n",
    "list_df=list()\n",
    "for key in keywords:\n",
    "    翻页_城市_参数={i:翻页_参数调整(key=[key],dqs=[''],curPage_int=[i]  )for i in  range(0,长度)  }\n",
    "    print (key,长度)\n",
    "    for k,v in  翻页_城市_参数.items():\n",
    "        payload = v\n",
    "        df = requests_liepin( url, params = payload)\n",
    "\n",
    "\n",
    "        df = df.assign (keyword = key)  # 区分  keyword    \n",
    "        df = df.assign (curPage = k)  # 区分  curPage    \n",
    "        list_df.append(df)\n",
    "        \n",
    "df_全国= pd.concat(list_df).reset_index()\n",
    "df_全国.index.name = '索引'\n",
    "df_全国"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>edu</th>\n",
       "      <th>经验</th>\n",
       "      <th>薪水</th>\n",
       "      <th>时间</th>\n",
       "      <th>职称</th>\n",
       "      <th>公司地点</th>\n",
       "      <th>公司名称</th>\n",
       "      <th>链结</th>\n",
       "      <th>公司URL</th>\n",
       "      <th>keyword</th>\n",
       "      <th>curPage</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>索引</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月13日</td>\n",
       "      <td>广告销售总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海盛闳广告有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929874517.shtml</td>\n",
       "      <td>https://www.liepin.com/company/12308481/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>10-15k·12薪</td>\n",
       "      <td>2020年07月14日</td>\n",
       "      <td>广告优化师（头条，广点通）</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海嵩恒网络科技股份有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1929908883.shtml</td>\n",
       "      <td>https://www.liepin.com/company/7877259/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-40k·13薪</td>\n",
       "      <td>2020年07月17日</td>\n",
       "      <td>广告投放总监</td>\n",
       "      <td>上海</td>\n",
       "      <td>BIO Group(中国)</td>\n",
       "      <td>https://www.liepin.com/job/1929934427.shtml</td>\n",
       "      <td>https://www.liepin.com/company/8440610/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>大专及以上</td>\n",
       "      <td>1-3年</td>\n",
       "      <td>7-15k·12薪</td>\n",
       "      <td>2020年06月05日</td>\n",
       "      <td>广告客户经理</td>\n",
       "      <td>上海-花木</td>\n",
       "      <td>上海知行文化传媒有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1911201211.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9204875/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>20-30k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告发布系统后台开发工程师</td>\n",
       "      <td>上海</td>\n",
       "      <td>路盛文化传播(上海)有限公司</td>\n",
       "      <td>https://www.liepin.com/job/1930049541.shtml</td>\n",
       "      <td>https://www.liepin.com/company/9801013/</td>\n",
       "      <td>广告</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>395</th>\n",
       "      <td>35</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>5-10年</td>\n",
       "      <td>30-60k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>品牌广告直客销售</td>\n",
       "      <td>上海,广州,北京</td>\n",
       "      <td>小红书</td>\n",
       "      <td>https://www.liepin.com/a/21047559.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>396</th>\n",
       "      <td>36</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>42-67k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>视频广告销售群总监</td>\n",
       "      <td></td>\n",
       "      <td>JR</td>\n",
       "      <td>https://www.liepin.com/a/20898991.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>397</th>\n",
       "      <td>37</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>40-70k·16薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告度量负责人 Solution engineer Leader</td>\n",
       "      <td>上海</td>\n",
       "      <td>国内知名独角兽移动互联网的科技企业</td>\n",
       "      <td>https://www.liepin.com/a/20540781.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>398</th>\n",
       "      <td>38</td>\n",
       "      <td>统招本科</td>\n",
       "      <td>3-5年</td>\n",
       "      <td>45-75k·16薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告算法负责人</td>\n",
       "      <td>上海,北京,杭州</td>\n",
       "      <td>国内头部上市互联网公司</td>\n",
       "      <td>https://www.liepin.com/a/20540681.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>399</th>\n",
       "      <td>39</td>\n",
       "      <td>本科及以上</td>\n",
       "      <td>经验不限</td>\n",
       "      <td>15-20k·12薪</td>\n",
       "      <td>2020年07月19日</td>\n",
       "      <td>广告客户经理</td>\n",
       "      <td>上海</td>\n",
       "      <td>上海文化传播公司</td>\n",
       "      <td>https://www.liepin.com/a/20515479.shtml</td>\n",
       "      <td></td>\n",
       "      <td>广告</td>\n",
       "      <td>9</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>400 rows × 12 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index    edu     经验          薪水           时间  \\\n",
       "索引                                                  \n",
       "0        0  本科及以上  5-10年  10-15k·12薪  2020年07月13日   \n",
       "1        1  本科及以上   3-5年  10-15k·12薪  2020年07月14日   \n",
       "2        2  本科及以上  5-10年  20-40k·13薪  2020年07月17日   \n",
       "3        3  大专及以上   1-3年   7-15k·12薪  2020年06月05日   \n",
       "4        4  本科及以上  5-10年  20-30k·12薪  2020年07月19日   \n",
       "..     ...    ...    ...         ...          ...   \n",
       "395     35   统招本科  5-10年  30-60k·12薪  2020年07月19日   \n",
       "396     36  本科及以上   3-5年  42-67k·12薪  2020年07月19日   \n",
       "397     37   统招本科   3-5年  40-70k·16薪  2020年07月19日   \n",
       "398     38   统招本科   3-5年  45-75k·16薪  2020年07月19日   \n",
       "399     39  本科及以上   经验不限  15-20k·12薪  2020年07月19日   \n",
       "\n",
       "                                   职称      公司地点               公司名称  \\\n",
       "索引                                                                   \n",
       "0                              广告销售总监        上海         上海盛闳广告有限公司   \n",
       "1                       广告优化师（头条，广点通）        上海     上海嵩恒网络科技股份有限公司   \n",
       "2                              广告投放总监        上海      BIO Group(中国)   \n",
       "3                              广告客户经理     上海-花木       上海知行文化传媒有限公司   \n",
       "4                       广告发布系统后台开发工程师        上海     路盛文化传播(上海)有限公司   \n",
       "..                                ...       ...                ...   \n",
       "395                          品牌广告直客销售  上海,广州,北京                小红书   \n",
       "396                         视频广告销售群总监                           JR   \n",
       "397  广告度量负责人 Solution engineer Leader        上海  国内知名独角兽移动互联网的科技企业   \n",
       "398                           广告算法负责人  上海,北京,杭州        国内头部上市互联网公司   \n",
       "399                            广告客户经理        上海           上海文化传播公司   \n",
       "\n",
       "                                              链结  \\\n",
       "索引                                                 \n",
       "0    https://www.liepin.com/job/1929874517.shtml   \n",
       "1    https://www.liepin.com/job/1929908883.shtml   \n",
       "2    https://www.liepin.com/job/1929934427.shtml   \n",
       "3    https://www.liepin.com/job/1911201211.shtml   \n",
       "4    https://www.liepin.com/job/1930049541.shtml   \n",
       "..                                           ...   \n",
       "395      https://www.liepin.com/a/21047559.shtml   \n",
       "396      https://www.liepin.com/a/20898991.shtml   \n",
       "397      https://www.liepin.com/a/20540781.shtml   \n",
       "398      https://www.liepin.com/a/20540681.shtml   \n",
       "399      https://www.liepin.com/a/20515479.shtml   \n",
       "\n",
       "                                        公司URL keyword  curPage  \n",
       "索引                                                              \n",
       "0    https://www.liepin.com/company/12308481/      广告        0  \n",
       "1     https://www.liepin.com/company/7877259/      广告        0  \n",
       "2     https://www.liepin.com/company/8440610/      广告        0  \n",
       "3     https://www.liepin.com/company/9204875/      广告        0  \n",
       "4     https://www.liepin.com/company/9801013/      广告        0  \n",
       "..                                        ...     ...      ...  \n",
       "395                                                广告        9  \n",
       "396                                                广告        9  \n",
       "397                                                广告        9  \n",
       "398                                                广告        9  \n",
       "399                                                广告        9  \n",
       "\n",
       "[400 rows x 12 columns]"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_指定城市.to_excel(\"指定行业类别关键词爬取.xlsx\",sheet_name=\"指定行业类别关键词爬取\")\n",
    "df_一个关键词爬取.to_excel(\"一个关键词爬取.xlsx\",sheet_name=\"一个关键词爬取\")\n",
    "df_全国.to_excel(\"指定公司行业关键词爬取.xlsx\",sheet_name=\"指定公司行业关键词爬取\")\n",
    "df_指定城市"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {
    "height": "calc(100% - 180px)",
    "left": "10px",
    "top": "150px",
    "width": "256px"
   },
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
